﻿using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;


namespace SegmentTool
{
    public static class RegexHelper
    {
        public static bool IsChinese(string text)
        {
            Regex regex = new Regex("[\u4e00-\u9fa5]");//正则表达式判断是否为汉字
            return regex.IsMatch(text);
        }

        public static void ExtractUnknownChinese(List<string> unknownList, string text)
        {
            Regex regex = new Regex(@"(\s\w){3,}");
            MatchCollection matches = regex.Matches(text);

            foreach (Match match in matches)
            {
                if (match.Value.Length > 0)
                {
                    ExtractResult(unknownList, match.Value);
                }
            }
        }

        private static void ExtractResult(List<string> unknownList, string value)
        {
            Regex regex = new Regex(@"\s");
            value = regex.Replace(value, "");
            if (value.Length < 3)
            {
                return;
            }

            value = value.Remove(value.Length - 1, 1);
            if (!unknownList.Contains(value))
            {
                unknownList.Add(value);
            }
        }

    }
}
